# -*- coding: utf-8 -*-
'''
Created on 2011. 10. 27.

@author: user
'''
from nltk.text import Text
import PlayNLTK
import nltk
import os
import sys

if __name__ == '__main__':
    fname = '../data/00_Annotation_EA.txt'
    lines = []
    sentences = []
    
    try:
        file = open(fname)
        
        while True:    
            line = file.readline()
            if not line:
                break
            lines.append(line)
            
        for line in lines:
            components = line.split('\t')
            sentences.append(components[2])
        
    except IOError:
        print >> sys.stderr, '파일을 열 수 없습니다'
    
    # 여기서부터 시작
    raw = "";
    for sentence in sentences:
        raw += sentence
    
    tokens = nltk.word_tokenize(raw)
    text = nltk.Text(tokens)
    
    print text
    
